


cd "../replication-package"

*---------------------------------------------------------------------------*
**# ---------- 3. Occupational pay gap: women's work valuation ---------- #**
*---------------------------------------------------------------------------*
********** median wage ratio of occupations 60%+ male vs. 60%+ female **********

/*
   - first, calculate the number of women and men in each occupation & each year 
   - next, assign a dummy variable for 60%+ male and 60%+ female 
   - merge this back with the original dataset 
   - by state X year, median wage for 60%+ male and 60%+ female occupation? 
   - calculate the ratio 
  
*/


use "data/raw/cps_00088.dta", clear
tab asecflag // only keeping ASEC sample for this 
keep if inrange(year, 1976, 2021) // 'last year' earnings 
	
********** 1) female represetnation in the top wage decile **********
// note that the wage at Census and ACS are wage from last year. 
// this means that the wage variable using ACS will only go up to 2018. 
	
* First, calculate the top decile of wage in each year 
// working age population 
keep if inrange(age, 18, 65)
merge m:1 year using "data/ipums_topcoding.dta", gen(m_topcode)
tab year m_topcode 
keep if m_topcode==3 
drop m_topcode 

* cleaning income: this part already adjusts for the 'previous year' earnings part 
cap program drop cpsincome 
program define cpsincome 

	*** 1. Top-coding & CPI adjustment (universal to all specifications)

	foreach incvar in incwage inclongj oincwage {
		recode `incvar' (99999999=.), gen(tc_`incvar') // 99999999 = NIU 
	}

	replace tc_incwage = tc_incwage*1.5 if tc_incwage>=incwage_top & tc_incwage!=. & year<=1987
	// 'incwage_top' given by the merged data with top-coding values 

	// 1988 and onward, 'incwage' is the sum of inclongj and oincwage 
	// https://cps.ipums.org/cps-action/variables/INCWAGE#description_section
	foreach incvar in inclongj oincwage {
		replace tc_`incvar' = tc_`incvar'*1.5 if tc_`incvar'>=`incvar'_top & tc_`incvar'!=. & year>=1988	
	}
	replace tc_incwage = tc_inclongj + tc_oincwage if year>=1988

	sum tc_*

	*** Calculate hourly earnings 
	// weeks worked last year? 
	tab wkswork1 srcearn, m
		//: note that weeks worked include non-wage working 
	
	// usual hours worked last year? 
	recode uhrsworkly (999=.)
	bys srcearn: sum uhrsworkly if inrange(srcearn, 1, 4)
		//: note that usual hours worked include non-wage working 

	// calculate hourly earnings 
	gen hrly_earn = tc_incwage / (wkswork1*uhrsworkly)
	sum hrly_earn

	// CPI adjustment 
	// use 2019 USD
	gen hrly_earn99 = hrly_earn*cpi99
	gen hrly_earn19 = hrly_earn99*1.535
	sum hrly_earn*

	// make sure we're using actual year earnings obtained 
	gen earn_year = year - 1 
	drop year 
	rename earn_year year 

	*** 2. Specifications 
	tab year srcearn // only available from 1987 

	// 1) all hrly earnings 
	gen hrearn_v1 = hrly_earn19 
	label var hrearn_v1 "Hourly earnings in 2019 USD, ALL"

	// 2) only include wage/salary 
	// (source of income from the longest job was wage/salary)
	gen hrearn_v2 = hrly_earn19
	fre srcearn
	replace hrearn_v2 = . if srcearn!=1 & year>=1987
	label var hrearn_v2 "Hourly earnings in 2019 USD, WAGE/SALARY ONLY"

	// 3) only include wage/salary + extreme values adjusted 
	// truncating below $1 and above $100 in 1979 USD 
	// following Cha and Weeden 2014
	gen hrearn_v3 = hrearn_v2
	gen hrearn_1979 = hrearn_v3*0.644*0.436 
		//: first convert to 1999 USD, and then 1979 USD 
		//: https://cps.ipums.org/cps/cpi99.shtml
	replace hrearn_1979 = 1 if hrearn_1979<=1 
	replace hrearn_1979 = 100 if hrearn_1979>=100 & hrearn_1979!=. 
	replace hrearn_v3 = hrearn_1979*2.295*1.535 
		//: convert first back to 1999 USD, and then 2019 USD 
	replace hrearn_v3 = . if srcearn!=1 & year>=1987
	sum hrearn_v3, det 

	label var hrearn_v3 "Hourly earnings in 2019 USD, WAGE/SALARY ONLY & TRUNCATED"

	// all meausres 
	sum hrearn_v*
end


cpsincome 


// counting # of females and males in each occ
gen fem = 1 if sex==2 
gen male = 1 if sex==1 
	
// drop invalid occupations 
keep if inrange(occ1990, 3, 889)
	
preserve 
collapse (sum) malenum = male (sum) femnum = fem [pw=asecwt], by(occ1990 year)
sort occ1990 year 
gen fempp = femnum / (malenum + femnum)
gen malepp = malenum / (malenum + femnum)
gen fem60dum = fempp>=.6 
gen male60dum = malepp>=.6
tab year fem60dum, row
tab year male60dum, row
tempfile occpp 
save `occpp'
restore 
	
merge m:1 occ1990 year using `occpp', gen(occmerge)
tab occmerge 
tab statefip fem60dum, row 
tab statefip male60dum, row 
	
//tab statefip male60dum if year==2015 
	
save "occ60_ASEC_temp", replace 

* calculate median wage for 60%+ male and 60%+ female occupation, by state and year 
use "occ60_ASEC_temp.dta", clear
preserve
keep if male60dum==1 
collapse (p50) earn_m    = hrearn_v1 ///
	     (p50) wage_m    = hrearn_v2 ///
		 (p50) wagetrc_m = hrearn_v3 [pw=asecwt], by(statefip year)
sort statefip year 
tempfile male60 
save `male60'
restore 
	
preserve 
keep if fem60dum==1
collapse (p50) earn_f    = hrearn_v1 ///
	     (p50) wage_f    = hrearn_v2 ///
		 (p50) wagetrc_f = hrearn_v3 [pw=asecwt], by(statefip year)
sort statefip year 
tempfile fem60 
save `fem60'
restore 
	
use `male60', clear 
merge 1:1 statefip year using `fem60', nogen
sort statefip year 
gen mf60ratio_earn = earn_m / earn_f
gen mf60ratio_wage = wage_m / wage_f 
gen mf60ratio_wagetrc = wagetrc_m / wagetrc_f
sum mf60ratio*
	
bys statefip: sum mf60ratio* if inrange(year, 1982, 2020)

gen state_a = statefip
label define state_a  1 "AL" 2 "AK" 5 "AR" 4 "AZ" 6 "CA" 8 "CO" 9 "CT" 10 "DE" ///
		11 "DC" 12 "FL" 13 "GA" 15 "HI" 16 "ID" 19 "IA" 17 "IL" 18 "IN" 20 "KS" 21 "KY" 22 "LA" ///
		25 "MA" 24 "MD" 23 "ME" 26 "MI" 27 "MN" 29 "MO" 28 "MS" 30 "MT" 37 "NC" 38 "ND" ///
		31 "NE" 33 "NH" 34 "NJ" 35 "NM" 32 "NV" 36 "NY" 39 "OH" 40 "OK" 41 "OR" 42 "PA" ///
		44 "RI" 45 "SC" 46 "SD" 47 "TN" 48 "TX" 49 "UT" 51 "VA" 50 "VT" 53 "WA" 55 "WI" ///
		54 "WV" 56 "WY", replace
label val state_a state_a	

keep year state_a statefip mf60ratio* 
	
save "data/state/03_occ60ratio_CPS.dta", replace	

rm occ60_ASEC_temp.dta


